# parameters
lit_data_url = "https://osis.geomar.de/underway/3ka2chb/api/v1/positions/Littorina2023?include_payloads=false&earliest_timestamp=2023-04-24"
lit_data_file = "lit_positions.csv"
buoy_zip_url = "https://cloud.geomar.de/s/tNRg9raGEK357e4/download"
buoy_zip_file = "buoy_positions.zip"
buoy_data_path = "data/"
buoy_data_path_full = "data/2023-05-03_Drifter_Filedrop/"
buoy_data_file = "buoy_positions.csv"
buoy_whitelist = ["D298", "D299", "D300", "D301", "D302", "D303", ]
!curl -o {buoy_zip_file} {buoy_zip_url}
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100 2526k 0 2526k 0 0 13332 0 --:--:-- 0:03:14 --:--:-- 683
!mkdir -p {buoy_data_path}
!unzip -o -q {buoy_zip_file} -d {buoy_data_path}
import pandas as pd
import hvplot.pandas
from pathlib import Path
def _try_reading_csv(file):
try:
return pd.read_csv(file)
except pd.errors.EmptyDataError:
return None
def load_all_buoy_csv_files(path=None):
files = sorted(Path(path).glob("*.csv"))
_dfs = [_try_reading_csv(f) for f in files]
_dfs = [_df for _df in _dfs if _df is not None]
df = pd.concat(_dfs, ignore_index=True)
df["date_UTC"] = pd.to_datetime(df["date_UTC"])
df["D_number"] = df["D_number"].astype(int).astype(str)
df = df.set_index(["D_number", "date_UTC"])
df = df.sort_index()
df = df.drop_duplicates()
df = df.reset_index()
return df
%%time
df_buoys = load_all_buoy_csv_files(
path=buoy_data_path_full,
)
df_buoys
CPU times: user 6.63 s, sys: 89.5 ms, total: 6.72 s Wall time: 6.72 s
| D_number | date_UTC | Latitude | Longitude | U_speed_mps | U_Dir_deg | batteryState | |
|---|---|---|---|---|---|---|---|
| 0 | 290 | 2023-05-05 16:26:21 | 54.48506 | 11.13692 | 0.060656 | -36.271504 | GOOD |
| 1 | 298 | 2023-04-20 10:04:49 | 54.32977 | 10.14909 | NaN | NaN | GOOD |
| 2 | 298 | 2023-04-20 10:09:52 | 54.32967 | 10.14905 | 0.037714 | -167.775106 | GOOD |
| 3 | 298 | 2023-04-20 10:14:50 | 54.32969 | 10.14911 | 0.015074 | 59.369044 | GOOD |
| 4 | 298 | 2023-04-20 10:19:47 | 54.32991 | 10.14860 | 0.138800 | -54.503193 | GOOD |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 12224 | 303 | 2023-05-09 08:40:47 | 54.93622 | 10.73319 | 0.085953 | 23.571389 | GOOD |
| 12225 | 303 | 2023-05-09 08:45:46 | 54.93580 | 10.73306 | 0.158797 | -171.313893 | GOOD |
| 12226 | 303 | 2023-05-09 09:20:47 | 54.93616 | 10.73313 | 0.019189 | 4.969256 | GOOD |
| 12227 | 303 | 2023-05-09 09:25:48 | 54.93605 | 10.73306 | 0.043317 | -161.295998 | GOOD |
| 12228 | 303 | 2023-05-09 09:40:44 | 54.93613 | 10.73317 | 0.012674 | 36.949253 | GOOD |
12229 rows × 7 columns
df_buoys = df_buoys.where(df_buoys.D_number.apply(lambda num: f"D{num}" in buoy_whitelist)).dropna()
df_buoys.to_csv(buoy_data_file, index=False)
!head -n5 {buoy_data_file}
D_number,date_UTC,Latitude,Longitude,U_speed_mps,U_Dir_deg,batteryState 298,2023-04-20 10:09:52,54.32967,10.14905,0.0377143858748649,-167.775106305755,GOOD 298,2023-04-20 10:14:50,54.32969,10.14911,0.0150742341143873,59.3690436747781,GOOD 298,2023-04-20 10:19:47,54.32991,10.1486,0.138800023585503,-54.5031934910538,GOOD 298,2023-04-20 10:24:52,54.32973,10.14894,0.0978186034069362,131.237675749866,GOOD
df_buoys.date_UTC.max()
Timestamp('2023-05-09 09:40:44')
import geopandas
/tmp/ipykernel_339/1529612126.py:1: UserWarning: Shapely 2.0 is installed, but because PyGEOS is also installed, GeoPandas will still use PyGEOS by default for now. To force to use and test Shapely 2.0, you have to set the environment variable USE_PYGEOS=0. You can do this before starting the Python process, or in your code before importing geopandas: import os os.environ['USE_PYGEOS'] = '0' import geopandas In a future release, GeoPandas will switch to using Shapely by default. If you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html). import geopandas
def read_littorina_positions(url=None):
df = geopandas.read_file(
url
)
df["Longitude"] = df.geometry.apply(lambda p: p.x)
df["Latitude"] = df.geometry.apply(lambda p: p.y)
df = df.drop(columns=["geometry", ])
# readacted positions
redact_here = df.datastream.str.contains("MarineTraffic")
df["Longitude_"] = (
redact_here * df["Longitude"].round(1)
+ ~redact_here * df["Longitude"]
)
df["Latitude_"] = (
redact_here * df["Latitude"].round(1)
+ ~redact_here * df["Latitude"]
)
return df
df_lit = read_littorina_positions(url=lit_data_url)
df_lit.to_csv(lit_data_file, index=False)
!head -n5 {lit_data_file}
id,context_shortname,datastream,import_info_id,import_time,obs_timestamp,platform_shortname,Longitude,Latitude,Longitude_,Latitude_ positions_for_context.12046da4-ab40-4f33-a47b-ea5583281213,Littorina2023,MarineTrafficConnector_gitlab,85d398b9-51ab-4a7a-841a-7fc3f4024060,2023-04-24 01:01:24.966000+00:00,2023-04-24 00:55:07+00:00,Littorina,10.18159,54.32806,10.2,54.3 positions_for_context.382a59b3-1cc1-4bdf-a959-3c765caf50e8,Littorina2023,MarineTrafficConnector_gitlab,2a6143de-7b8a-4197-8b34-90214a3a9fa1,2023-04-24 02:01:27.990000+00:00,2023-04-24 01:55:10+00:00,Littorina,10.18163,54.32806,10.2,54.3 positions_for_context.26e7b71c-9d4f-47c8-9337-b19673ae58d0,Littorina2023,MarineTrafficConnector_gitlab,17be6065-c029-46d3-b29c-c3d7284cbf5a,2023-04-24 03:01:10.069000+00:00,2023-04-24 02:55:14+00:00,Littorina,10.1816,54.32805,10.2,54.3 positions_for_context.ce96acdb-ca5f-46aa-b7b8-069c6bd19385,Littorina2023,MarineTrafficConnector_gitlab,8f51352e-debe-41db-9aae-95b1bad0d9cb,2023-04-24 04:01:13.361000+00:00,2023-04-24 03:58:09+00:00,Littorina,10.1816,54.32809,10.2,54.3